*** Tables H1, I1, I2, I3 and figure I1.

********************************************************************************
* 
*                             Table H1
*
********************************************************************************

clear

use "${data}cleaned_data_y3.dta"
do "${blocks}construct_new_variables"

do "${blocks}variables_sample_as"
drop if GCSEAStarCount == .

gen female=1 if Gender=="Female"
replace female=0 if Gender=="Male"

label var std_maths_percent "Math score"
label var maths_modules "Math modules"

replace maths_modules=0 if maths_modules==.

egen mean_astar = mean(GCSEAStarCount)
egen sd_astar   = sd(GCSEAStarCount)
gen std_astar   = (GCSEAStarCount - mean_astar) / sd_astar
label var std_astar "General exams"

foreach k in 0 1 {
foreach z in 0 1 {
	foreach y in 0 1{
		foreach x in std_astar std_maths_percent maths_modules  {
			display "`x'"
			ci mean `x' if EEMP == `k' & WinterPool==`z' & female==`y'
			global `x'_`k'`z'`y'=r(mean)
			global `x'_`k'`z'`y'_se=r(se)
}
}
}
}

tempname covariates
tempfile covs
postfile `covariates' ///
    str100 statistic mi_da_m mi_da_f mi_p_m mi_p_f nmi_da_m nmi_da_f nmi_p_m nmi_p_f using "`covs'", replace


		post `covariates'  (`"General exams"') ($std_astar_100) ($std_astar_101) ($std_astar_110) ($std_astar_111) ($std_astar_000) ($std_astar_001) ($std_astar_010) ($std_astar_011)

		post `covariates'  (`"\quad Standard error"') ($std_astar_100_se) ($std_astar_101_se) ($std_astar_110_se) ($std_astar_111_se) ($std_astar_000_se) ($std_astar_001_se) ($std_astar_010_se) ($std_astar_011_se)

		
post `covariates' ("Advanced Math exams")   (.) (.) (.) (.) (.) (.)  (.) (.)

		post `covariates'  (`"\quad Math score"') ($std_maths_percent_100) ($std_maths_percent_101) ($std_maths_percent_110) ($std_maths_percent_111)  ($std_maths_percent_000) ($std_maths_percent_001) ($std_maths_percent_010) ($std_maths_percent_011)

		
		post `covariates'  (`"\quad \quad Standard error"') ($std_maths_percent_100_se) ($std_maths_percent_101_se) ($std_maths_percent_110_se) ($std_maths_percent_111_se)  ($std_maths_percent_000_se) ($std_maths_percent_001_se) ($std_maths_percent_010_se) ($std_maths_percent_011_se)

		post `covariates'  (`"\quad Math modules"') ($maths_modules_100) ($maths_modules_101) ($maths_modules_110) ($maths_modules_111)  ($maths_modules_000) ($maths_modules_001) ($maths_modules_010) ($maths_modules_011)
	
	
		post `covariates'  (`"\quad \quad Standard error"') ($maths_modules_100_se) ($maths_modules_101_se) ($maths_modules_110_se) ($maths_modules_111_se)  ($maths_modules_000_se) ($maths_modules_001_se) ($maths_modules_010_se) ($maths_modules_011_se)
	
postclose `covariates'

use `covs', clear

foreach x in mi_da_m mi_da_f mi_p_m mi_p_f nmi_da_m nmi_da_f nmi_p_m nmi_p_f {
format `x'  %9.2fc
	}	

listtab * using "${tables}covariates_summary_2.tex", ///
    rstyle(tabular) replace ///
    head("\begin{tabular}{@{\extracolsep{2pt}} l c c c c c c c c c D{.}{.}{-3} D{.}{.}{-3}}" ///
    "\toprule" ///
	    " & \multicolumn{4}{c}{MI subjects} & \multicolumn{4}{c}{Non-MI subjects} \\" ///
			    " & \multicolumn{2}{c}{Direct} & \multicolumn{2}{c}{From pool} & \multicolumn{2}{c}{Direct} & \multicolumn{2}{c}{From pool} \\" ///
							    " & \multicolumn{1}{c}{M} & \multicolumn{1}{c}{F} & \multicolumn{1}{c}{M} & \multicolumn{1}{c}{F} & \multicolumn{1}{c}{M} & \multicolumn{1}{c}{F} & \multicolumn{1}{c}{M} & \multicolumn{1}{c}{F} \\" ///
	"\midrule" "& (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8)  \\ \midrule") ///
    foot("\bottomrule" "\end{tabular}")
	
	

********************************************************************************
* 
*   									Table I1.
*
********************************************************************************

clear
use "${data}cleaned_data_y3.dta"
do "${blocks}construct_new_variables"

do "${blocks}variables_sample_as"

drop if GCSEAStarCount == .

gen female=1 if Gender=="Female"
replace female=0 if Gender=="Male"

gen male=1 if female==0
replace male=0 if female==1
label variable male "Male"

label var std_maths_percent "Math score"
label var maths_modules "Math modules"

replace maths_modules=0 if maths_modules==.

egen mean_astar = mean(GCSEAStarCount)
egen sd_astar   = sd(GCSEAStarCount)
gen std_astar   = (GCSEAStarCount - mean_astar) / sd_astar
label var std_astar "General exams"

reg ns_std_perc_y1 i.male i.Course_NatSci_split  i.ApplyCollege if EEMP == 1 & WinterPool==0, vce(robust)
estadd local course_fe "\checkmark"
estadd local apply_college_fe "\checkmark"
eststo r4

reg ns_std_perc_y1 i.male std_astar std_maths_percent maths_modules i.Course_NatSci_split  i.ApplyCollege if EEMP == 1 & WinterPool==0, vce(robust)
estadd local course_fe "\checkmark"
estadd local apply_college_fe "\checkmark"
eststo r5

reg ns_std_perc_y1 i.male std_astar std_maths_percent maths_modules i.male#c.std_astar i.male#c.std_maths_percent i.male#c.maths_modules i.Course_NatSci_split  i.ApplyCollege if EEMP == 1 & WinterPool==0, vce(robust)
estadd local course_fe "\checkmark"
estadd local apply_college_fe "\checkmark"
eststo r6

esttab r4 r5 r6 using "${tables}covariates_2.tex", cells(b(pval(pval) fmt(2)) se(par fmt(2))) keep(std_astar std_maths_percent maths_modules 1.male 1.male#c.std_astar 1.male#c.std_maths_percent 1.male#c.maths_modules) collabels(none) ///
scalars("course_fe Subject FE" ///
"apply_college_fe Application college FE") ///
eqlabels(none) label varwidth(25) nonumbers  posthead("& (1) & (2) & (3) \\ \midrule") nonumbers mlabels(none) se(%9.2f) ///
gaps booktabs nonotes ///
replace

********************************************************************************
* 
*                             Table I2
*
********************************************************************************

clear
use "${data}cleaned_data_y3.dta"
do "${blocks}construct_new_variables"
do "${blocks}variables_sample_as"

drop if GCSEAStarCount == .

egen mean_astar = mean(GCSEAStarCount)
egen sd_astar   = sd(GCSEAStarCount)
gen std_astar   = (GCSEAStarCount - mean_astar) / sd_astar

drop if std_astar==. |  std_maths_percent==. | maths_module==.

* vectors of coefficients
foreach x in 1 2 {
 reg ns_std_perc_y1 std_astar std_maths_percent maths_modules if EEMP == 1 & type==`x'
 matrix b`x' = e(b)
 matrix list b`x'
 scalar b_cons_`x'=_b[_cons]
}

* vector of differences in coefficients
 matrix b_diff=b2-b1
 matrix list b_diff
  matrix b_cons_diff=J(1,1,0)
 matrix b_cons_diff[1,1]=b_cons_2-b_cons_1
 matrix list b_cons_diff
 matrix b_diff_noc=b_diff[1,1..3]
 matrix list b_diff_noc

* vectors of E(covariates) 
foreach y in 1 2 {
matrix x`y'=J(1,4,0) //defined empty matrix

summarize std_astar  if EEMP == 1 & type==`y'
matrix x`y'[1,1]=(r(mean))

summarize std_maths_percent  if EEMP == 1 & type==`y'
matrix x`y'[1,2]=(r(mean))

summarize maths_modules  if EEMP == 1 & type==`y'
matrix x`y'[1,3]=(r(mean))

matrix x`y'[1,4]=1
matrix list x`y'

matrix x`y'_noc=x`y'[1,1..3]
matrix list x`y'_noc
}

matrix x_diff=x2-x1
matrix list x_diff


* Effects
foreach y in 1 2 {
matrix pe_`y'=x`y'_noc * b_diff_noc' // productivity effect
matrix list pe_`y' 
matrix ce_`y'=x_diff * b`y'' // covariate effect
matrix list ce_`y'
}

* Gap in performance (lower bound)
foreach z in 1 2 {
summarize ns_std_perc_y1  if EEMP == 1 & type==`z'
scalar y`z'=r(mean)
scalar list y`z'
}

scalar y_diff = y2-y1
scalar list y_diff

tempname statistics
tempfile stats
postfile `statistics' ///
str100 statistic x2 x1 using "`stats'", replace


foreach z in 2 1 {
local x`z'=y_diff
}

post `statistics' ("Lower bound $\mathbb{E}[X_g]-\mathbb{E}[X_h]$")  (`x2') (`x1')	


foreach z in 2 1 {
local x`z'=b_cons_diff[1,1]
}


post `statistics' ("Difference in intercepts (\textit{g}-\textit{h})")  (`x2') (`x1')	


foreach z in 2 1 {
local x`z'=pe_`z'[1,1]
}

post `statistics' ("Productivity effect")  (`x2') (`x1')	


foreach z in 2 1 {
local x`z'=ce_`z'[1,1]
}

post `statistics' ("Covariate effect")  (`x2') (`x1')	



postclose `statistics'

use `stats', clear

foreach x in x2 x1 {
format `x'  %9.3fc
	}	

	
listtab * using "${tables}oxaca.tex", ///
    rstyle(tabular) replace ///
    head("\begin{tabular}{@{\extracolsep{2pt}} l c c D{.}{.}{-3} D{.}{.}{-3}}" ///
    "\toprule" ///
	    " & \multicolumn{1}{c}{Top eq-n} & \multicolumn{1}{c}{Bottom eq-n} \\" ///
	"\midrule" "& (1) & (2) \\ \midrule") ///
    foot("\bottomrule" "\end{tabular}")
	
	
	
********************************************************************************
* 
*                             Table I3
*
********************************************************************************

clear
use "${data}cleaned_data_y3.dta"
do "${blocks}construct_new_variables"
do "${blocks}variables_sample_as"

drop if GCSEAStarCount == .

egen mean_astar = mean(GCSEAStarCount)
egen sd_astar   = sd(GCSEAStarCount)
gen std_astar   = (GCSEAStarCount - mean_astar) / sd_astar
label var std_astar "General exams"

label var std_maths_percent "Math score"
label var maths_modules "Math modules"

drop if std_astar==. |  std_maths_percent==. | maths_module==.

gen DirectAdmit=1 if WinterPool==0
replace DirectAdmit=0 if WinterPool==1

gen female=1 if Gender=="Female"
replace female=0 if Gender=="Male"


**** Probability of Direct Admission
areg DirectAdmit std_astar std_maths_percent maths_modules i.Course_NatSci_split if EEMP == 1, a(ApplyCollege)
estadd local course_fe "\checkmark"
estadd local apply_college_fe "\checkmark"
eststo r1

**** Y1 performance
areg ns_std_perc_y1 std_astar std_maths_percent maths_modules i.Course_NatSci_split if EEMP == 1, a(ApplyCollege)
estadd local course_fe "\checkmark"
estadd local apply_college_fe "\checkmark"
eststo r2
 
esttab r1 r2  using "${tables}qualifications.tex", cells(b(pval(pval) fmt(2)) se(par fmt(2))) keep(std_astar std_maths_percent maths_modules) collabels(none) ///
scalars("course_fe Subject FE" ///
"apply_college_fe Application college FE") ///
eqlabels(none) label varwidth(25) nonumbers  posthead("& Probability of direct admission & Year 1 performance \\" "& (1) & (2) \\ \midrule") nonumbers mlabels(none) se(%9.2f) ///
gaps booktabs nonotes ///
replace


********************************************************************************
* 
*                             Figure I1
*
********************************************************************************

clear
use "${data}cleaned_data_y3.dta"
do "${blocks}construct_new_variables"
do "${blocks}variables_sample_as"

drop if GCSEAStarCount == .

gen female=1 if Gender=="Female"
replace female=0 if Gender=="Male"

gen male=1 if female==0
replace male=0 if female==1
label variable male "Male"

egen mean_astar = mean(GCSEAStarCount)
egen sd_astar   = sd(GCSEAStarCount)
gen std_astar   = (GCSEAStarCount - mean_astar) / sd_astar
label var std_astar "General exams"

statsby _b, by(type) saving("${temp}coefficients.dta", replace): reg ns_std_perc_y1 std_astar std_maths_percent maths_modules if EEMP == 1 & type~=., vce(robust) 

use "${temp}coefficients.dta", clear

gen Gender="Female" if type==1
replace Gender="Male" if type==2
drop type
sort Gender
save "${temp}coefficients.dta", replace

use "${data}merged_tripos.dta", clear

save "${temp}y1predictions.dta", replace
drop if ApplyYear>2016

do "${blocks}variables_sample_as"

merge m:1 Gender using "${temp}coefficients.dta"

drop _merge

drop if Course~="Economics" & Course~="Engineering" & Course~="Mathematics"
drop if GCSEAStarCount==.
drop if WinterPool==0 & Gender=="Male"

egen mean_astar = mean(GCSEAStarCount)
egen sd_astar   = sd(GCSEAStarCount)
gen std_astar   = (GCSEAStarCount - mean_astar) / sd_astar

gen y1predictions=_b_cons+_b_std_astar*std_astar+_b_std_maths_percent*std_maths_percent+_b_maths_modules*maths_modules

replace Offers=0 if Offers==.
ttest Offers, by(Gender)
* offer rate pooled male 0.21, directly admitted female 0.32

distplot y1predictions if inrange(y1predictions, -2, 2), over(Gender)  yline(0.21, lpattern(dash) lcolor(sienna))  yline(0.32, lpattern(dash) lcolor(navy)) xtitle("Predicted Y1 performance") subtitle("Pooled male vs directly admitted female") note("Dashed lines show % admitted from each group") graphregion(fc(white))

graph export "${graphs}predicted_Y1_cdf.pdf", as(pdf) replace 
